library(tidyverse)
library(lubridate)
library(themebg)
data_dir <- "csse_covid_19_data/csse_covid_19_daily_reports"

# f <- list.files(data_dir, pattern = "csv", full.names = TRUE)

# x <- map(f, read_csv)
# raw_daily <- map_df(f, read_csv)
# day1 <- read_csv("csse_covid_19_data/csse_covid_19_daily_reports/01-22-2020.csv")
# yest <- read_csv("csse_covid_19_data/csse_covid_19_daily_reports/03-22-2020.csv")
curr <- read_csv("csse_covid_19_data/csse_covid_19_daily_reports/03-24-2020.csv")
df_confirmed <- read_csv("csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv") %>%
    rename(
        state = `Province/State`,
        country = `Country/Region`,
        lat = Lat,
        long = Long
    ) %>%
    pivot_longer(
        cols = c(-state, -country, -lat, -long), 
        names_to = "date", 
        values_to = "confirmed"
    ) %>%
    mutate_at("date", as_date, format = "%m/%d/%y", tz = "UTC")

df_deaths <- read_csv("csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv") %>%
    rename(
        state = `Province/State`,
        country = `Country/Region`,
        lat = Lat,
        long = Long
    ) %>%
    pivot_longer(
        cols = c(-state, -country, -lat, -long), 
        names_to = "date", 
        values_to = "deaths"
    ) %>%
    mutate_at("date", as_date, format = "%m/%d/%y", tz = "UTC")

df_recovered <- read_csv("csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv") %>%
    rename(
        state = `Province/State`,
        country = `Country/Region`,
        lat = Lat,
        long = Long
    ) %>%
    pivot_longer(
        cols = c(-state, -country, -lat, -long), 
        names_to = "date", 
        values_to = "recovered"
    ) %>%
    mutate_at("date", as_date, format = "%m/%d/%y", tz = "UTC")

df_ts <- left_join(df_confirmed, df_deaths) %>%
    left_join(df_recovered)
df_country <- df_ts %>%
    group_by(country, date) %>%
    summarize_at(c("confirmed", "deaths", "recovered"), sum, na.rm = TRUE) %>%
    ungroup() %>%
    mutate(active = confirmed - deaths - recovered) %>%
    group_by(country) %>%
    mutate(
        new_cases = confirmed - lag(confirmed),
        new_deaths = deaths - lag(deaths)
    )

df_case1 <- df_country %>%
    group_by(country) %>%
    arrange(date, country) %>%
    filter(confirmed > 0) %>%
    distinct(country, .keep_all = TRUE) %>%
    select(country, date_case1 = date)

df_case50 <- df_country %>%
    group_by(country) %>%
    arrange(date, country) %>%
    filter(confirmed > 50) %>%
    distinct(country, .keep_all = TRUE) %>%
    select(country, date_case50 = date)
    
df_data <- df_country %>%
    left_join(df_case1) %>%
    left_join(df_case50) %>%
    mutate(
        day_case1 = difftime(date, date_case1, units = "days"),
        day_case50 = difftime(date, date_case50, units = "days"),
        usa = country == "US",
        str_date = as.character(date)
    ) %>%
    mutate_at(c("day_case1", "day_case50"), as.numeric)

df_top25 <- df_data %>%
    arrange(desc(date), country) %>%
    distinct(country, .keep_all = TRUE) %>%
    ungroup() %>%
    top_n(25, confirmed)

df_top12 <- df_data %>%
    arrange(desc(date), country) %>%
    distinct(country, .keep_all = TRUE) %>%
    ungroup() %>%
    top_n(12, confirmed)

df_plt <- filter(df_data, confirmed > 0) %>%
    group_by(country, date) %>%
    mutate(death_rate = deaths / confirmed * 100) %>%
    ungroup()

df_plt_top12 <- semi_join(df_plt, df_top12, by = "country") 

df_plt_day1 <- filter(df_data, day_case1 >= 0)

df_plt_day1_top25 <- semi_join(df_plt_day1, df_top25, by = "country")

df_plt_day1_top12 <- semi_join(df_plt_day1, df_top12, by = "country")

df_plt_day50 <- filter(df_data, day_case50 >= 0)

df_plt_day50_top25 <- semi_join(df_plt_day50, df_top25, by = "country")
library(plotly)

Top 12 countries by confirmed cases

df_plt_top12 %>%
    plot_ly(x = ~date, y = ~confirmed, color = ~country, colors = "Paired") %>%
    add_lines(hovertext = ~country) %>%
    layout(showlegend = FALSE)

Confirmed cases by country

df_plt_top12 %>%
    plot_ly(x = ~date, y = ~deaths, color = ~country, colors = "Paired") %>%
    add_lines(hovertext = ~country) %>%
    layout(showlegend = FALSE)

Deaths by country

df_plt_top12 %>%
    plot_ly(x = ~date, y = ~death_rate, color = ~country, colors = "Paired") %>%
    add_lines(hovertext = ~country) %>%
    layout(showlegend = FALSE)

Death rate by country

df_plt_day1_top12 %>%
    plot_ly(x = ~day_case1, y = ~confirmed, color = ~country, colors = "Paired") %>%
    add_lines(hovertext = ~country) %>%
    layout(showlegend = FALSE)

Confirmed cases since first case reported in each country

df_plt_day1_top12 %>%
    plot_ly(x = ~day_case1, y = ~deaths, color = ~country, colors = "Paired") %>%
    add_lines(hovertext = ~country) %>%
    layout(showlegend = FALSE)

Deaths since first case reported in each country

df_plt_top12 %>%
    plot_ly(x = ~date, y = ~new_cases, color = ~country, colors = "Paired") %>%
    add_lines(hovertext = ~country) %>%
    layout(showlegend = FALSE)

New cases reported each day by country

df_plt_top12 %>%
    plot_ly(x = ~date, y = ~new_deaths, color = ~country, colors = "Paired") %>%
    add_lines(hovertext = ~country) %>%
    layout(showlegend = FALSE)

New deaths reported each day by country

df_plt_day1_top12 %>%
    ggplot(aes(x = day_case1, y = new_cases, color = country, size = usa)) +
    geom_smooth(se = FALSE) +
    scale_color_brewer(NULL, palette = "Paired") +
    scale_size_discrete(NULL, range = c(0, 1.5)) +
    theme_bg() +
    theme(legend.position = "none")
df_plt_day1_top12 %>%
    ggplot(aes(x = day_case1, y = new_deaths, color = country, size = usa)) +
    geom_smooth(se = FALSE) +
    scale_color_brewer(NULL, palette = "Paired") +
    scale_size_discrete(NULL, range = c(0, 1.5)) +
    theme_bg() +
    theme(legend.position = "none")
df_plt %>%
    plot_ly(
        type = "scattergeo",
        locationmode = "country names", 
        locations = ~country, 
        size = ~confirmed,
        frame = ~str_date,
        showlegend = FALSE
    ) %>%
    layout(geo = list(projection = list(type = "natural earth")))

Confirmed cases by country over time